Libraries
library(lattice)
library(ISLR)
library(MASS)
library(caret)
library(tidyverse)
library(rpart)
library(plyr); library(dplyr)
library(caret)
library(rattle) # Fancy tree plot
library(rpart.plot)
library(dplyr)
library(parallel)
library(Hmisc)
library(e1071)
library(pROC)
library(ggplot2)
library(rpart.plot)
library(VIM)
library(mice)
# Reading the dataset
nbi <- read.csv('/Users/AkshayKale/Documents/github/nbi-predictive-analysis/decision_tree.csv')
# Select attributes
df <- nbi %>% select(adt.cat, adtt.cat, material, state, structure.number, structure.type, type.of.wearing.surface, current.deck, current.substructure, current.superstructure, total.deck.intervention, total.sub.intervention, total.super.intervention, deck.intervention.in.next.3.years, sub.intervention.in.next.3.years, super.intervention.in.next.3.years, precipitation, snowfall, freezethaw, score)
Dataset to model deck of the bridges
# Select attributes
df_deck <- df %>% select(adt.cat, adtt.cat, material, structure.type, type.of.wearing.surface, current.deck, current.substructure, current.superstructure, total.deck.intervention, total.sub.intervention, total.super.intervention, precipitation, snowfall, freezethaw, score, deck.intervention.in.next.3.years)
# Remove null values
df_deck <- na.omit(df_deck)
Preview of the data
head(df_deck)
Training and testing Deck
target_variable <- 'deck.intervention.in.next.3.years'
index = createDataPartition(y=df_deck[[target_variable]], p=0.7, list=FALSE)
train.set = df_deck[index,]
test.set = df_deck[-index,]
positive_class = 'No'
negative_class = 'Yes'
reset.seed <- function()
{
# ensure results are repeatable
set.seed(1337)
}
library(doParallel)
num_cores <- detectCores() #note: you can specify a smaller number if you want
cl <- makePSOCKcluster(num_cores)
registerDoParallel(cl)
reset.seed()
model <- deck.intervention.in.next.3.years ~ adt.cat + adtt.cat + material + structure.type + type.of.wearing.surface + current.deck + current.substructure + current.superstructure + total.deck.intervention + total.sub.intervention + total.super.intervention + precipitation + snowfall + freezethaw + score
tunelengths = seq(from=5, to=100, by=5)
list_sens <- c()
list_spec <- c()
list_f1 <-c()
list_tl <-c()
list_kappa <-c()
list_auc <- c()
probabilities_dt <- data.frame(No=double(), Yes=double())
for(tl in tunelengths) {
rtree_model = train( model,
data = train.set,
method = "rpart",
trControl = trainControl(method = "repeatedcv", search = 'random', repeats = 5,
summaryFunction = twoClassSummary,
classProbs = T, savePredictions = T), tuneLength = tl,
metric='ROC')
rtree_model
# Predict on the training set
tree_class_test <- rtree_model%>% predict(newdata = test.set, type = 'raw')
tree_prob_test <- rtree_model%>% predict(newdata = test.set, type = 'prob')
# Confusion Matrix
metrics <- confusionMatrix(tree_class_test, test.set[[target_variable]])
metricsbyclass <- metrics$byClass
sens <- type.convert(metricsbyclass[1])
list_sens <- c(list_sens, sens)
spec <- type.convert(metricsbyclass[2])
list_spec <- c(list_spec, spec)
f1 <- type.convert(metricsbyclass[7])
list_f1 <- c(list_f1, f1)
tunelen <- tl
list_tl <- c(list_tl, tunelen)
kappa <- type.convert(metrics$overall[2])
list_kappa <- c(list_kappa, kappa)
# Confusion matrix
area <- roc(test.set[[target_variable]], tree_prob_test[[positive_class]], plot = TRUE, print.auc = TRUE, legacy.axes = TRUE)
list_auc <- c(list_auc, area$auc)
# rpart model
rpart.plot(rtree_model$finalModel)
filename<-paste('tree-deck-no-undersample', toString(tl), '.csv', collapse = '')
write.csv(rpart.rules(rtree_model$finalModel, roundint=FALSE, clip.facs=TRUE), filename)
# Probability values
tree_prob_test['Tunelength'] <- rep(tl, length(tree_class_test))
# Concatenate
probabilities_dt <- bind_rows(probabilities, tree_prob_test)
# metrics
metrics
}
Warning in .Internal(gc(verbose, reset, full)) :
closing unused connection 26 (<-localhost:11678)
Warning in .Internal(gc(verbose, reset, full)) :
closing unused connection 25 (<-localhost:11678)
Warning in .Internal(gc(verbose, reset, full)) :
closing unused connection 24 (<-localhost:11678)
Warning in .Internal(gc(verbose, reset, full)) :
closing unused connection 23 (<-localhost:11678)
Warning in .Internal(gc(verbose, reset, full)) :
closing unused connection 22 (<-localhost:11678)
Warning in .Internal(gc(verbose, reset, full)) :
closing unused connection 21 (<-localhost:11678)
Warning in .Internal(gc(verbose, reset, full)) :
closing unused connection 20 (<-localhost:11678)
Warning in .Internal(gc(verbose, reset, full)) :
closing unused connection 19 (<-localhost:11678)
Setting levels: control = No, case = Yes
Setting direction: controls > cases








































df_metric_dt <- data.frame(list_sens, list_spec, list_f1, list_tl, list_kappa, list_auc)
names(df_metric_dt) <- c('Sensitivity', 'Specificity', 'F1','Tunelength', 'Kappa', 'AUC')
write.csv(probabilities_dt, 'tree-prob-deck-nou.csv')
write.csv(df_metric_dt, 'tree-metric-deck-nou.csv')
df_metric_dt
probabilities_dt
df_metric_dt
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKYXV0aG9yOiBBa3NoYXkgS2FsZQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKZGVzY3JpcHRpb246IERlY2lzaW9uIHRyZWUgbW9kZWxpbmcgb2YgZGVjayB3aXRob3V0IHVuZGVyc2FtcGxpbmcuCi0tLQoKIyMjIyAqTGlicmFyaWVzKgpgYGB7cn0KbGlicmFyeShsYXR0aWNlKQpsaWJyYXJ5KElTTFIpCmxpYnJhcnkoTUFTUykKbGlicmFyeShjYXJldCkKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkocnBhcnQpCmxpYnJhcnkocGx5cik7IGxpYnJhcnkoZHBseXIpCmxpYnJhcnkoY2FyZXQpCmxpYnJhcnkocmF0dGxlKSAgICAgICAgICAgICAgICAgIyBGYW5jeSB0cmVlIHBsb3QKbGlicmFyeShycGFydC5wbG90KSAKbGlicmFyeShkcGx5cikKbGlicmFyeShwYXJhbGxlbCkKCmxpYnJhcnkoSG1pc2MpCmxpYnJhcnkoZTEwNzEpCmxpYnJhcnkocFJPQykKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHJwYXJ0LnBsb3QpCmxpYnJhcnkoVklNKQpsaWJyYXJ5KG1pY2UpCgojIFJlYWRpbmcgdGhlIGRhdGFzZXQKbmJpIDwtIHJlYWQuY3N2KCcvVXNlcnMvQWtzaGF5S2FsZS9Eb2N1bWVudHMvZ2l0aHViL25iaS1wcmVkaWN0aXZlLWFuYWx5c2lzL2RlY2lzaW9uX3RyZWUuY3N2JykKCiMgU2VsZWN0IGF0dHJpYnV0ZXMKZGYgPC0gbmJpICU+JSBzZWxlY3QoYWR0LmNhdCwgYWR0dC5jYXQsIG1hdGVyaWFsLCBzdGF0ZSwgc3RydWN0dXJlLm51bWJlciwgc3RydWN0dXJlLnR5cGUsIHR5cGUub2Yud2VhcmluZy5zdXJmYWNlLCBjdXJyZW50LmRlY2ssIGN1cnJlbnQuc3Vic3RydWN0dXJlLCBjdXJyZW50LnN1cGVyc3RydWN0dXJlLCB0b3RhbC5kZWNrLmludGVydmVudGlvbiwgdG90YWwuc3ViLmludGVydmVudGlvbiwgdG90YWwuc3VwZXIuaW50ZXJ2ZW50aW9uLCBkZWNrLmludGVydmVudGlvbi5pbi5uZXh0LjMueWVhcnMsIHN1Yi5pbnRlcnZlbnRpb24uaW4ubmV4dC4zLnllYXJzLCBzdXBlci5pbnRlcnZlbnRpb24uaW4ubmV4dC4zLnllYXJzLCBwcmVjaXBpdGF0aW9uLCBzbm93ZmFsbCwgZnJlZXpldGhhdywgc2NvcmUpCgpgYGAKCiMjIyMgKkRhdGFzZXQgdG8gbW9kZWwgZGVjayBvZiB0aGUgYnJpZGdlcyoKYGBge3J9CiMgU2VsZWN0IGF0dHJpYnV0ZXMKZGZfZGVjayA8LSBkZiAlPiUgc2VsZWN0KGFkdC5jYXQsIGFkdHQuY2F0LCBtYXRlcmlhbCwgc3RydWN0dXJlLnR5cGUsIHR5cGUub2Yud2VhcmluZy5zdXJmYWNlLCBjdXJyZW50LmRlY2ssIGN1cnJlbnQuc3Vic3RydWN0dXJlLCBjdXJyZW50LnN1cGVyc3RydWN0dXJlLCB0b3RhbC5kZWNrLmludGVydmVudGlvbiwgdG90YWwuc3ViLmludGVydmVudGlvbiwgdG90YWwuc3VwZXIuaW50ZXJ2ZW50aW9uLCAgcHJlY2lwaXRhdGlvbiwgc25vd2ZhbGwsIGZyZWV6ZXRoYXcsIHNjb3JlLCBkZWNrLmludGVydmVudGlvbi5pbi5uZXh0LjMueWVhcnMpCgojIFJlbW92ZSBudWxsIHZhbHVlcwpkZl9kZWNrIDwtIG5hLm9taXQoZGZfZGVjaykKYGBgCgojIyMjICpQcmV2aWV3IG9mIHRoZSBkYXRhKgpgYGB7cn0KaGVhZChkZl9kZWNraykKYGBgCgojIyMjICpUcmFpbmluZyBhbmQgdGVzdGluZyBEZWNrKgpgYGB7cn0KdGFyZ2V0X3ZhcmlhYmxlIDwtICdkZWNrLmludGVydmVudGlvbi5pbi5uZXh0LjMueWVhcnMnCmluZGV4ID0gY3JlYXRlRGF0YVBhcnRpdGlvbih5PWRmX2RlY2tbW3RhcmdldF92YXJpYWJsZV1dLCBwPTAuNywgbGlzdD1GQUxTRSkKdHJhaW4uc2V0ID0gZGZfZGVja1tpbmRleCxdCnRlc3Quc2V0ID0gZGZfZGVja1staW5kZXgsXQoKcG9zaXRpdmVfY2xhc3MgPSAnTm8nCm5lZ2F0aXZlX2NsYXNzID0gJ1llcycgCgpyZXNldC5zZWVkIDwtIGZ1bmN0aW9uKCkKewogICMgZW5zdXJlIHJlc3VsdHMgYXJlIHJlcGVhdGFibGUKICBzZXQuc2VlZCgxMzM3KQp9CmxpYnJhcnkoZG9QYXJhbGxlbCkKbnVtX2NvcmVzIDwtIGRldGVjdENvcmVzKCkgI25vdGU6IHlvdSBjYW4gc3BlY2lmeSBhIHNtYWxsZXIgbnVtYmVyIGlmIHlvdSB3YW50CmNsIDwtIG1ha2VQU09DS2NsdXN0ZXIobnVtX2NvcmVzKQpyZWdpc3RlckRvUGFyYWxsZWwoY2wpCgpyZXNldC5zZWVkKCkKbW9kZWwgPC0gZGVjay5pbnRlcnZlbnRpb24uaW4ubmV4dC4zLnllYXJzIH4gYWR0LmNhdCArIGFkdHQuY2F0ICsgbWF0ZXJpYWwgKyBzdHJ1Y3R1cmUudHlwZSArIHR5cGUub2Yud2VhcmluZy5zdXJmYWNlICsgY3VycmVudC5kZWNrICsgY3VycmVudC5zdWJzdHJ1Y3R1cmUgKyBjdXJyZW50LnN1cGVyc3RydWN0dXJlICsgdG90YWwuZGVjay5pbnRlcnZlbnRpb24gKyB0b3RhbC5zdWIuaW50ZXJ2ZW50aW9uICsgdG90YWwuc3VwZXIuaW50ZXJ2ZW50aW9uICsgIHByZWNpcGl0YXRpb24gKyBzbm93ZmFsbCArIGZyZWV6ZXRoYXcgKyBzY29yZQoKdHVuZWxlbmd0aHMgPSBzZXEoZnJvbT01LCB0bz0xMDAsIGJ5PTUpCgpsaXN0X3NlbnMgPC0gYygpCmxpc3Rfc3BlYyA8LSBjKCkKbGlzdF9mMSA8LWMoKQpsaXN0X3RsIDwtYygpCmxpc3Rfa2FwcGEgPC1jKCkKbGlzdF9hdWMgPC0gYygpCnByb2JhYmlsaXRpZXNfZHQgPC0gZGF0YS5mcmFtZShObz1kb3VibGUoKSwgWWVzPWRvdWJsZSgpKQoKZm9yKHRsIGluIHR1bmVsZW5ndGhzKSB7CnJ0cmVlX21vZGVsID0gdHJhaW4oIG1vZGVsLAogICAgICAgICAgICAgICAgIGRhdGEgPSB0cmFpbi5zZXQsCiAgICAgICAgICAgICAgICAgbWV0aG9kID0gInJwYXJ0IiwKICAgICAgICAgICAgICAgICB0ckNvbnRyb2wgPSB0cmFpbkNvbnRyb2wobWV0aG9kID0gInJlcGVhdGVkY3YiLCBzZWFyY2ggPSAncmFuZG9tJywgcmVwZWF0cyA9IDUsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHN1bW1hcnlGdW5jdGlvbiA9IHR3b0NsYXNzU3VtbWFyeSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgY2xhc3NQcm9icyA9IFQsIHNhdmVQcmVkaWN0aW9ucyA9IFQpLCAgdHVuZUxlbmd0aCA9IHRsLAogICAgICAgICAgICAgICAgIG1ldHJpYz0nUk9DJykKCgpydHJlZV9tb2RlbAoKIyBQcmVkaWN0IG9uIHRoZSB0cmFpbmluZyBzZXQKdHJlZV9jbGFzc190ZXN0IDwtIHJ0cmVlX21vZGVsJT4lIHByZWRpY3QobmV3ZGF0YSA9IHRlc3Quc2V0LCB0eXBlID0gJ3JhdycpCnRyZWVfcHJvYl90ZXN0IDwtIHJ0cmVlX21vZGVsJT4lIHByZWRpY3QobmV3ZGF0YSA9IHRlc3Quc2V0LCB0eXBlID0gJ3Byb2InKQoKIyBDb25mdXNpb24gTWF0cml4Cm1ldHJpY3MgPC0gY29uZnVzaW9uTWF0cml4KHRyZWVfY2xhc3NfdGVzdCwgdGVzdC5zZXRbW3RhcmdldF92YXJpYWJsZV1dKQptZXRyaWNzYnljbGFzcyA8LSBtZXRyaWNzJGJ5Q2xhc3MKCnNlbnMgPC0gdHlwZS5jb252ZXJ0KG1ldHJpY3NieWNsYXNzWzFdKQpsaXN0X3NlbnMgPC0gYyhsaXN0X3NlbnMsIHNlbnMpCgpzcGVjIDwtIHR5cGUuY29udmVydChtZXRyaWNzYnljbGFzc1syXSkKbGlzdF9zcGVjIDwtIGMobGlzdF9zcGVjLCBzcGVjKQoKZjEgPC0gdHlwZS5jb252ZXJ0KG1ldHJpY3NieWNsYXNzWzddKQpsaXN0X2YxIDwtIGMobGlzdF9mMSwgZjEpCgp0dW5lbGVuIDwtIHRsCmxpc3RfdGwgPC0gYyhsaXN0X3RsLCB0dW5lbGVuKQoKa2FwcGEgPC0gdHlwZS5jb252ZXJ0KG1ldHJpY3Mkb3ZlcmFsbFsyXSkKbGlzdF9rYXBwYSA8LSBjKGxpc3Rfa2FwcGEsIGthcHBhKQoKIyBDb25mdXNpb24gbWF0cml4CmFyZWEgPC0gcm9jKHRlc3Quc2V0W1t0YXJnZXRfdmFyaWFibGVdXSwgdHJlZV9wcm9iX3Rlc3RbW3Bvc2l0aXZlX2NsYXNzXV0sIHBsb3QgPSBUUlVFLCBwcmludC5hdWMgPSBUUlVFLCBsZWdhY3kuYXhlcyA9IFRSVUUpCmxpc3RfYXVjIDwtIGMobGlzdF9hdWMsIGFyZWEkYXVjKQoKIyBycGFydCBtb2RlbApycGFydC5wbG90KHJ0cmVlX21vZGVsJGZpbmFsTW9kZWwpCmZpbGVuYW1lPC1wYXN0ZSgndHJlZS1kZWNrLW5vLXVuZGVyc2FtcGxlJywgdG9TdHJpbmcodGwpLCAnLmNzdicsIGNvbGxhcHNlID0gJycpCndyaXRlLmNzdihycGFydC5ydWxlcyhydHJlZV9tb2RlbCRmaW5hbE1vZGVsLCByb3VuZGludD1GQUxTRSwgY2xpcC5mYWNzPVRSVUUpLCBmaWxlbmFtZSkKCiMgUHJvYmFiaWxpdHkgdmFsdWVzCnRyZWVfcHJvYl90ZXN0WydUdW5lbGVuZ3RoJ10gPC0gcmVwKHRsLCBsZW5ndGgodHJlZV9jbGFzc190ZXN0KSkKCiMgQ29uY2F0ZW5hdGUKcHJvYmFiaWxpdGllc19kdCA8LSBiaW5kX3Jvd3MocHJvYmFiaWxpdGllcywgdHJlZV9wcm9iX3Rlc3QpCgojIG1ldHJpY3MKbWV0cmljcwp9CgpkZl9tZXRyaWNfZHQgPC0gZGF0YS5mcmFtZShsaXN0X3NlbnMsIGxpc3Rfc3BlYywgbGlzdF9mMSwgbGlzdF90bCwgbGlzdF9rYXBwYSwgbGlzdF9hdWMpCm5hbWVzKGRmX21ldHJpY19kdCkgPC0gYygnU2Vuc2l0aXZpdHknLCAnU3BlY2lmaWNpdHknLCAnRjEnLCdUdW5lbGVuZ3RoJywgJ0thcHBhJywgJ0FVQycpCgp3cml0ZS5jc3YocHJvYmFiaWxpdGllc19kdCwgJ3RyZWUtcHJvYi1kZWNrLW5vdS5jc3YnKQp3cml0ZS5jc3YoZGZfbWV0cmljX2R0LCAndHJlZS1tZXRyaWMtZGVjay1ub3UuY3N2JykKCmRmX21ldHJpY19kdApwcm9iYWJpbGl0aWVzX2R0CgojIHZhcmlhYmxlIGltcG9ydGFuY2UgLT4gcmFuZG9tIGZvcmVzdCAoZm9yIGFsbCB0dW5lbGVuZ3RoKSAtPiBSYW5raW5nIChmb3IgYWxsIHZhcmlhYmxlcykgCiMgVW5kZXJzdGFuZGluZyB0aGUgcmVsZXZhbmNlIG9mIHRoZSBwYXJhbWV0ZXJzCgpgYGAKCmBgYHtyfQpkZl9tZXRyaWNfZHQKYGBgCgo=